安装Spark

解压 spark 压缩包并设置环境变量

cd /opt
tar zxvf spark-3.2.1-bin-hadoop3.2.tgz
mv spark-3.2.1-bin-hadoop3.2 spark
chown -R hadoop:hadoop spark
cat > /etc/profile.d/spark.sh << EOF
export SPARK_HOME=/opt/spark
export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH
EOF
source /etc/profile
su hadoop

配置 spark-env.sh

cat >> $SPARK_HOME/conf/spark-env.sh << EOF
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
export HADOOP_HOME=/opt/hadoop
export HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop
export SPARK_HOME=/opt/spark
export SPARK_CONF_DIR=/opt/spark/conf
export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
EOF

配置 spark-defaults.conf

cat >> $SPARK_HOME/conf/spark-defaults.conf << EOF
spark.master yarn
spark.driver.memory 2g
spark.executor.memory 3g
spark.eventLog.enabled true
spark.eventLog.dir hdfs:///spark-logs
spark.yarn.historyServer.address 192.168.122.24:18080
spark.history.fs.logDirectory hdfs:///spark-logs
spark.driver.extraClassPath /opt/hadoop/share/hadoop/common/lib/*
spark.executor.extraClassPath /opt/hadoop/share/hadoop/common/lib/*
spark.executorEnv.PYSPARK_PYTHON /usr/bin/python
EOF

hdfs 创建 spark 日志目录
hdfs dfs -mkdir -p hdfs:///spark-logs
验证本机 spark 环境

spark-sql -e "show databases"
*****
*****
2025-01-13 21:53:50,519 INFO hive.metastore: Opened a connection to metastore, current connections: 1
2025-01-13 21:53:50,575 INFO hive.metastore: Connected to metastore.
Spark master: yarn, Application Id: application_1736775772310_0002
2025-01-13 21:53:50,789 INFO thriftserver.SparkSQLCLIDriver: Spark master: yarn, Application Id: application_1736775772310_0002
2025-01-13 21:53:53,869 INFO codegen.CodeGenerator: Code generated in 167.916501 ms
2025-01-13 21:53:53,927 INFO codegen.CodeGenerator: Code generated in 12.675728 ms
default
Time taken: 3.139 seconds, Fetched 1 row(s)
2025-01-13 21:53:53,980 INFO thriftserver.SparkSQLCLIDriver: Time taken: 3.139 seconds, Fetched 1 row(s)
2025-01-13 21:53:54,026 INFO server.AbstractConnector: Stopped Spark@58b0dfee{HTTP/1.1, (http/1.1)}{0.0.0.0:4040}
2025-01-13 21:53:54,032 INFO ui.SparkUI: Stopped Spark web UI at http://192.168.122.24:4040
2025-01-13 21:53:54,043 INFO cluster.YarnClientSchedulerBackend: Interrupting monitor thread
2025-01-13 21:53:54,098 INFO cluster.YarnClientSchedulerBackend: Shutting down all executors
2025-01-13 21:53:54,099 INFO cluster.YarnSchedulerBackend$YarnDriverEndpoint: Asking each executor to shut down
2025-01-13 21:53:54,114 INFO cluster.YarnClientSchedulerBackend: YARN client scheduler backend Stopped
2025-01-13 21:53:54,229 INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
2025-01-13 21:53:54,255 INFO memory.MemoryStore: MemoryStore cleared
2025-01-13 21:53:54,256 INFO storage.BlockManager: BlockManager stopped
2025-01-13 21:53:54,282 INFO storage.BlockManagerMaster: BlockManagerMaster stopped
2025-01-13 21:53:54,288 INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
2025-01-13 21:53:54,310 INFO spark.SparkContext: Successfully stopped SparkContext
2025-01-13 21:53:54,310 INFO util.ShutdownHookManager: Shutdown hook called